#import numpy
import numpy as np
#import pandas
import pandas as pd
#import seaborn
import seaborn as sns
# import matplotlib.pyplot
import matplotlib.pyplot as plt
# import the data set
df_house_price_data = pd.read_csv("C:\\Users\\jemin\\OneDrive\\Desktop\\SEM 6\\Pythonscript RNGPIT\\House_Price.csv")
# show the first observation of the dataset
df_house_price_data.head()
| price | crime_rate | resid_area | air_qual | room_num | age | dist1 | dist2 | dist3 | dist4 | teachers | poor_prop | airport | n_hos_beds | n_hot_rooms | waterbody | rainfall | bus_ter | parks | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 6.575 | 65.2 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | YES | 5.480 | 11.1920 | River | 23 | YES | 0.049347 |
| 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 6.421 | 78.9 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | NO | 7.332 | 12.1728 | Lake | 42 | YES | 0.046146 |
| 2 | 34.7 | 0.02729 | 37.07 | 0.469 | 7.185 | 61.1 | 5.03 | 4.86 | 5.01 | 4.97 | 22.2 | 4.03 | NO | 7.394 | 101.1200 | None | 38 | YES | 0.045764 |
| 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 6.998 | 45.8 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | YES | 9.268 | 11.2672 | Lake | 45 | YES | 0.047151 |
| 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 7.147 | 54.2 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | NO | 8.824 | 11.2896 | Lake | 55 | YES | 0.039474 |
# show last five observation of the dataset
df_house_price_data.tail()
| price | crime_rate | resid_area | air_qual | room_num | age | dist1 | dist2 | dist3 | dist4 | teachers | poor_prop | airport | n_hos_beds | n_hot_rooms | waterbody | rainfall | bus_ter | parks | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 501 | 22.4 | 0.06263 | 41.93 | 0.573 | 6.593 | 69.1 | 2.64 | 2.45 | 2.76 | 2.06 | 19.0 | 9.67 | NO | 9.348 | 12.1792 | Lake and River | 27 | YES | 0.056006 |
| 502 | 20.6 | 0.04527 | 41.93 | 0.573 | 6.120 | 76.7 | 2.44 | 2.11 | 2.46 | 2.14 | 19.0 | 9.08 | YES | 6.612 | 13.1648 | Lake and River | 20 | YES | 0.059903 |
| 503 | 23.9 | 0.06076 | 41.93 | 0.573 | 6.976 | 91.0 | 2.34 | 2.06 | 2.29 | 1.98 | 19.0 | 5.64 | NO | 5.478 | 12.1912 | None | 31 | YES | 0.057572 |
| 504 | 22.0 | 0.10959 | 41.93 | 0.573 | 6.794 | 89.3 | 2.54 | 2.31 | 2.40 | 2.31 | 19.0 | 6.48 | YES | 7.940 | 15.1760 | None | 47 | YES | 0.060694 |
| 505 | 19.0 | 0.04741 | 41.93 | 0.573 | 6.030 | 80.8 | 2.72 | 2.24 | 2.64 | 2.42 | 19.0 | 7.88 | YES | 10.280 | 10.1520 | None | 45 | YES | 0.060336 |
# Show the Attribute Names
df_house_price_data.columns
Index(['price', 'crime_rate', 'resid_area', 'air_qual', 'room_num', 'age',
'dist1', 'dist2', 'dist3', 'dist4', 'teachers', 'poor_prop', 'airport',
'n_hos_beds', 'n_hot_rooms', 'waterbody', 'rainfall', 'bus_ter',
'parks'],
dtype='object')
# Price=Price of house # Crime_rate= crime rate around the house # resid_area=Residence Area around the House # air_qual=Air quality around the House # room_num=Number o Room in the House # age= Age of House (How many yers old of House) # dist1= Distance from industrial area1 # dist2= Distance from industrial area2 # dist3= Distance from industrial area3 # dist4= Distance from industrial area4 # teachers=Howmany Teacher Stay in Aroud the House # poor_prop= Poor Population in Around the House # airport= airport Available or not Aroud the House # n_hos_beds= Numbers bads in the Hospital Around the House # n_hot_rooms= Numbers room in the Hotels Around the House # waterbody= Resource of Water Around the House # rainfall= mm rate of rainfall Around the House # bus_ter = Bus Terminal Availble in Around the House # parks= Houmany parks
colomninfo
-price:Price of the House
-Crime_rate:crime rate around the house
# Shape of the Data
# Basic Info
# Basic Discription
# Shape of the Data
df_house_price_data.shape
(506, 19)
Interpritetion
# Basic Info(function)
df_house_price_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 506 entries, 0 to 505 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 price 506 non-null float64 1 crime_rate 506 non-null float64 2 resid_area 506 non-null float64 3 air_qual 506 non-null float64 4 room_num 506 non-null float64 5 age 506 non-null float64 6 dist1 506 non-null float64 7 dist2 506 non-null float64 8 dist3 506 non-null float64 9 dist4 506 non-null float64 10 teachers 506 non-null float64 11 poor_prop 506 non-null float64 12 airport 506 non-null object 13 n_hos_beds 498 non-null float64 14 n_hot_rooms 506 non-null float64 15 waterbody 506 non-null object 16 rainfall 506 non-null int64 17 bus_ter 506 non-null object 18 parks 506 non-null float64 dtypes: float64(15), int64(1), object(3) memory usage: 75.2+ KB
Interpritation
# Basic Discription
df_house_price_data.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| price | 506.0 | 22.528854 | 9.182176 | 5.000000 | 17.025000 | 21.200000 | 25.000000 | 50.000000 |
| crime_rate | 506.0 | 3.613524 | 8.601545 | 0.006320 | 0.082045 | 0.256510 | 3.677083 | 88.976200 |
| resid_area | 506.0 | 41.136779 | 6.860353 | 30.460000 | 35.190000 | 39.690000 | 48.100000 | 57.740000 |
| air_qual | 506.0 | 0.554695 | 0.115878 | 0.385000 | 0.449000 | 0.538000 | 0.624000 | 0.871000 |
| room_num | 506.0 | 6.284634 | 0.702617 | 3.561000 | 5.885500 | 6.208500 | 6.623500 | 8.780000 |
| age | 506.0 | 68.574901 | 28.148861 | 2.900000 | 45.025000 | 77.500000 | 94.075000 | 100.000000 |
| dist1 | 506.0 | 3.971996 | 2.108532 | 1.130000 | 2.270000 | 3.385000 | 5.367500 | 12.320000 |
| dist2 | 506.0 | 3.628775 | 2.108580 | 0.920000 | 1.940000 | 3.010000 | 4.992500 | 11.930000 |
| dist3 | 506.0 | 3.960672 | 2.119797 | 1.150000 | 2.232500 | 3.375000 | 5.407500 | 12.320000 |
| dist4 | 506.0 | 3.618972 | 2.099203 | 0.730000 | 1.940000 | 3.070000 | 4.985000 | 11.940000 |
| teachers | 506.0 | 21.544466 | 2.164946 | 18.000000 | 19.800000 | 20.950000 | 22.600000 | 27.400000 |
| poor_prop | 506.0 | 12.653063 | 7.141062 | 1.730000 | 6.950000 | 11.360000 | 16.955000 | 37.970000 |
| n_hos_beds | 498.0 | 7.899767 | 1.476683 | 5.268000 | 6.634500 | 7.999000 | 9.088000 | 10.876000 |
| n_hot_rooms | 506.0 | 13.041605 | 5.238957 | 10.057600 | 11.189800 | 12.720000 | 14.170800 | 101.120000 |
| rainfall | 506.0 | 39.181818 | 12.513697 | 3.000000 | 28.000000 | 39.000000 | 50.000000 | 60.000000 |
| parks | 506.0 | 0.054454 | 0.010632 | 0.033292 | 0.046464 | 0.053507 | 0.061397 | 0.086711 |
Interpritation
crime_rate,age,resid_area,teachers,poor_prop# Data Cleaning
# null Values Handling
# outlier Treatement
# Data Cleaning
df_house_price_data.columns
Index(['price', 'crime_rate', 'resid_area', 'air_qual', 'room_num', 'age',
'dist1', 'dist2', 'dist3', 'dist4', 'teachers', 'poor_prop', 'airport',
'n_hos_beds', 'n_hot_rooms', 'waterbody', 'rainfall', 'bus_ter',
'parks'],
dtype='object')
# lets rename the columns
df_house_price_data=df_house_price_data.rename(columns={'price':'Price','crime_rate':'Crime_Rate','resid_area':'Resid_Area',
'air_qual':'Air_Quality','room_num':'Room_Num','age':'Age',
'dist1':'Dist1','dist2':'Dist2','dist3':'Dist3','dist4':'Dist4',
'teachers':'Teachers','poor_prop':'Poor_Population','airport':'Airport',
'n_hos_beds':'N_Hos_Beds','n_hot_rooms':'N_Hot_Rooms',
'waterbody':'Waterbody','rainfall':'Rainfall',
'bus_ter':'Bus_Terminal','parks':'Parks'})
df_house_price_data.head()
| Price | Crime_Rate | Resid_Area | Air_Quality | Room_Num | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | Airport | N_Hos_Beds | N_Hot_Rooms | Waterbody | Rainfall | Bus_Terminal | Parks | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 6.575 | 65.2 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | YES | 5.480 | 11.1920 | River | 23 | YES | 0.049347 |
| 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 6.421 | 78.9 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | NO | 7.332 | 12.1728 | Lake | 42 | YES | 0.046146 |
| 2 | 34.7 | 0.02729 | 37.07 | 0.469 | 7.185 | 61.1 | 5.03 | 4.86 | 5.01 | 4.97 | 22.2 | 4.03 | NO | 7.394 | 101.1200 | None | 38 | YES | 0.045764 |
| 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 6.998 | 45.8 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | YES | 9.268 | 11.2672 | Lake | 45 | YES | 0.047151 |
| 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 7.147 | 54.2 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | NO | 8.824 | 11.2896 | Lake | 55 | YES | 0.039474 |
# Let's Do the Typecasting Here
# Room_num
# Age
# Techers
# N_Hos_Beds
# N_Hot_Rooms
df_house_price_data['Room_Num']=df_house_price_data['Room_Num'].astype('int')
df_house_price_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 506 entries, 0 to 505 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Price 506 non-null float64 1 Crime_Rate 506 non-null float64 2 Resid_Area 506 non-null float64 3 Air_Quality 506 non-null float64 4 Room_Num 506 non-null int32 5 Age 506 non-null float64 6 Dist1 506 non-null float64 7 Dist2 506 non-null float64 8 Dist3 506 non-null float64 9 Dist4 506 non-null float64 10 Teachers 506 non-null float64 11 Poor_Population 506 non-null float64 12 Airport 506 non-null object 13 N_Hos_Beds 498 non-null float64 14 N_Hot_Rooms 506 non-null float64 15 Waterbody 506 non-null object 16 Rainfall 506 non-null int64 17 Bus_Terminal 506 non-null object 18 Parks 506 non-null float64 dtypes: float64(14), int32(1), int64(1), object(3) memory usage: 73.3+ KB
df_house_price_data['Age']=df_house_price_data['Age'].astype('int')
df_house_price_data['Teachers ']=df_house_price_data['Teachers'].astype('int')
df_house_price_data['N_Hos_Beds']=df_house_price_data['N_Hos_Beds '].astype('int')
df_house_price_data['N_Hot_Rooms']=df_house_price_data['N_Hot_Rooms'].astype('int')
# in Previous We did the typecasting base the on observation and Domain expertise
# Now time is for Data visulization
df_house_price_data.hist()
plt.tight_layout()
plt.show()
df_house_price_data.plot(kind= 'kde')
<Axes: ylabel='Density'>
df_house_price_data.Price.plot(kind='kde')
plt.tight_layout()
plt.show()
# Room_Num
# N_Hot_Rooms
df_house_price_data.Room_Num.hist(color='red')
plt.tight_layout()
plt.show()
df_house_price_data.N_Hot_Rooms.hist(color='red')
plt.tight_layout()
plt.show()
df_house_price_data.Room_Num.plot(kind='kde')
plt.tight_layout()
plt.show()
df_house_price_data.N_Hot_Rooms.plot(kind='kde')
plt.tight_layout()
plt.show()
df_house_price_data['Room_Num']=df_house_price_data['Room_Num'].astype('object')
df_house_price_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 506 entries, 0 to 505 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Price 506 non-null float64 1 Crime_Rate 506 non-null float64 2 Resid_Area 506 non-null float64 3 Air_Quality 506 non-null float64 4 Room_Num 506 non-null object 5 Age 506 non-null int32 6 Dist1 506 non-null float64 7 Dist2 506 non-null float64 8 Dist3 506 non-null float64 9 Dist4 506 non-null float64 10 Teachers 506 non-null float64 11 Poor_Population 506 non-null float64 12 Airport 506 non-null object 13 N_Hos_Beds 498 non-null float64 14 N_Hot_Rooms 506 non-null int32 15 Waterbody 506 non-null object 16 Rainfall 506 non-null int64 17 Bus_Terminal 506 non-null object 18 Parks 506 non-null float64 19 Teachers 506 non-null int32 dtypes: float64(12), int32(3), int64(1), object(4) memory usage: 73.3+ KB
# in Previous We did the typecasting base the on observation and Domain expertise
# Now time is for Data visulization
df_house_price_data.hist()
plt.tight_layout()
plt.show()
df_house_price_data.head()
| Price | Crime_Rate | Resid_Area | Air_Quality | Room_Num | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | Airport | N_Hos_Beds | N_Hot_Rooms | Waterbody | Rainfall | Bus_Terminal | Parks | Teachers | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 6 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | YES | 5.480 | 11 | River | 23 | YES | 0.049347 | 24 |
| 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 6 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | NO | 7.332 | 12 | Lake | 42 | YES | 0.046146 | 22 |
| 2 | 34.7 | 0.02729 | 37.07 | 0.469 | 7 | 61 | 5.03 | 4.86 | 5.01 | 4.97 | 22.2 | 4.03 | NO | 7.394 | 101 | None | 38 | YES | 0.045764 | 22 |
| 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 6 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | YES | 9.268 | 11 | Lake | 45 | YES | 0.047151 | 21 |
| 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 7 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | NO | 8.824 | 11 | Lake | 55 | YES | 0.039474 | 21 |
# Find the number of Missing Values
df_house_price_data.isnull().sum()
Price 0 Crime_Rate 0 Resid_Area 0 Air_Quality 0 Room_Num 0 Age 0 Dist1 0 Dist2 0 Dist3 0 Dist4 0 Teachers 0 Poor_Population 0 Airport 0 N_Hos_Beds 8 N_Hot_Rooms 0 Waterbody 0 Rainfall 0 Bus_Terminal 0 Parks 0 Teachers 0 dtype: int64
df_house_price_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 506 entries, 0 to 505 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Price 506 non-null float64 1 Crime_Rate 506 non-null float64 2 Resid_Area 506 non-null float64 3 Air_Quality 506 non-null float64 4 Room_Num 506 non-null object 5 Age 506 non-null int32 6 Dist1 506 non-null float64 7 Dist2 506 non-null float64 8 Dist3 506 non-null float64 9 Dist4 506 non-null float64 10 Teachers 506 non-null float64 11 Poor_Population 506 non-null float64 12 Airport 506 non-null object 13 N_Hos_Beds 498 non-null float64 14 N_Hot_Rooms 506 non-null int32 15 Waterbody 506 non-null object 16 Rainfall 506 non-null int64 17 Bus_Terminal 506 non-null object 18 Parks 506 non-null float64 19 Teachers 506 non-null int32 dtypes: float64(12), int32(3), int64(1), object(4) memory usage: 73.3+ KB
# Laws
## 1.en We HAve 10% Data Missing remove the observation
## 2.en We have persentage beetween 30 to 70 percent then we neet to inputs the missing values
## 3.en we have more than 80% remove the Attributes
round(df_house_price_data.isnull().sum()/len(df_house_price_data)*100,2)
Price 0.00 Crime_Rate 0.00 Resid_Area 0.00 Air_Quality 0.00 Room_Num 0.00 Age 0.00 Dist1 0.00 Dist2 0.00 Dist3 0.00 Dist4 0.00 Teachers 0.00 Poor_Population 0.00 Airport 0.00 N_Hos_Beds 1.58 N_Hot_Rooms 0.00 Waterbody 0.00 Rainfall 0.00 Bus_Terminal 0.00 Parks 0.00 Teachers 0.00 dtype: float64
# We Have LEss tha 2% Data is Missing From N_Hos_Beds,Lets the Remove the observation
df_house_price_data.dropna(inplace=True)
df_house_price_data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 498 entries, 0 to 505 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Price 498 non-null float64 1 Crime_Rate 498 non-null float64 2 Resid_Area 498 non-null float64 3 Air_Quality 498 non-null float64 4 Room_Num 498 non-null object 5 Age 498 non-null int32 6 Dist1 498 non-null float64 7 Dist2 498 non-null float64 8 Dist3 498 non-null float64 9 Dist4 498 non-null float64 10 Teachers 498 non-null float64 11 Poor_Population 498 non-null float64 12 Airport 498 non-null object 13 N_Hos_Beds 498 non-null float64 14 N_Hot_Rooms 498 non-null int32 15 Waterbody 498 non-null object 16 Rainfall 498 non-null int64 17 Bus_Terminal 498 non-null object 18 Parks 498 non-null float64 19 Teachers 498 non-null int32 dtypes: float64(12), int32(3), int64(1), object(4) memory usage: 75.9+ KB
df_house_price_data.isnull().sum()
Price 0 Crime_Rate 0 Resid_Area 0 Air_Quality 0 Room_Num 0 Age 0 Dist1 0 Dist2 0 Dist3 0 Dist4 0 Teachers 0 Poor_Population 0 Airport 0 N_Hos_Beds 0 N_Hot_Rooms 0 Waterbody 0 Rainfall 0 Bus_Terminal 0 Parks 0 Teachers 0 dtype: int64
df_house_price_data.plot(kind='box')
plt.xticks(rotation=90)
plt.title('Outlier Detection')
plt.tight_layout()
plt.show()
df_num=df_house_price_data.select_dtypes(np.number)
# Find limit
Q1=df_num.quantile(0.25)
Q3=df_num.quantile(0.75)
IQR=Q3-Q1
df_num=df_num[(df_num>=Q1-1.5*IQR)& (df_num<=Q3+1.5*IQR)]
df_num.reset_index()
df_num=df_num.reset_index()
df_num.plot(kind='box',color='red',rot=90)
plt.show()
# concate the tretment dataframe with catacgoriacl data frame
df_cat=df_house_price_data.select_dtypes('object')
df_cat=df_cat.reset_index()
print(df_cat.shape)
(498, 5)
print(df_num.shape)
(498, 17)
# concate both
df_house_price_data=pd.concat((df_num,df_cat),axis=1)
df_house_price_data.shape
(498, 22)
df_house_price_data.isnull().sum()
index 0 Price 39 Crime_Rate 64 Resid_Area 0 Air_Quality 0 Age 0 Dist1 5 Dist2 5 Dist3 5 Dist4 5 Teachers 14 Poor_Population 7 N_Hos_Beds 0 N_Hot_Rooms 2 Rainfall 0 Parks 5 Teachers 14 index 0 Room_Num 0 Airport 0 Waterbody 0 Bus_Terminal 0 dtype: int64
df_house_price_data.dropna(inplace=True)
df_house_price_data.isnull().sum()
index 0 Price 0 Crime_Rate 0 Resid_Area 0 Air_Quality 0 Age 0 Dist1 0 Dist2 0 Dist3 0 Dist4 0 Teachers 0 Poor_Population 0 N_Hos_Beds 0 N_Hot_Rooms 0 Rainfall 0 Parks 0 Teachers 0 index 0 Room_Num 0 Airport 0 Waterbody 0 Bus_Terminal 0 dtype: int64
# Uni variant Analysis
# bivarint Analysis
# multivariant Analysis
# lets Segrigates The Numarical & Catagorical Columns
df_num =df_house_price_data.select_dtypes(include=np.number)
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# Catagorical data frame
df_cat=df_house_price_data.select_dtypes(include=['object'])
df_cat.head()
| Room_Num | Airport | Waterbody | Bus_Terminal | |
|---|---|---|---|---|
| 0 | 6 | YES | River | YES |
| 1 | 6 | NO | Lake | YES |
| 3 | 6 | YES | Lake | YES |
| 4 | 7 | NO | Lake | YES |
| 5 | 6 | YES | None | YES |
# FInd The Count Of Each Catagary Present in that Particular Columns
df_cat.Room_Num.value_counts()
6 228 5 120 7 22 4 4 8 1 3 1 Name: Room_Num, dtype: int64
# Visulies the Count
df_cat.Room_Num.value_counts().plot(kind='bar', rot=360)
plt.show()
Interpritation
df_cat.head()
| Room_Num | Airport | Waterbody | Bus_Terminal | |
|---|---|---|---|---|
| 0 | 6 | YES | River | YES |
| 1 | 6 | NO | Lake | YES |
| 3 | 6 | YES | Lake | YES |
| 4 | 7 | NO | Lake | YES |
| 5 | 6 | YES | None | YES |
# Find the Values Counts for Airport Column
df_cat.Airport.value_counts()
YES 205 NO 171 Name: Airport, dtype: int64
# Plot the Values Counts
df_cat.Airport.value_counts().plot(kind='bar')
plt.show()
Interpretation
df_cat.head(2)
| Room_Num | Airport | Waterbody | Bus_Terminal | |
|---|---|---|---|---|
| 0 | 6 | YES | River | YES |
| 1 | 6 | NO | Lake | YES |
# FInd the Count the Each catagary in the that column
df_cat.Waterbody.value_counts()
River 139 None 112 Lake 70 Lake and River 55 Name: Waterbody, dtype: int64
# visulize the count
df_cat.Waterbody.value_counts().plot(kind='bar',color='pink')
plt.title('Count of Waterbody')
plt.ylabel('counts')
plt.xlabel('Waterbody')
plt.xticks(rotation=45)
plt.grid()
plt.show()
Interpretation
df_cat.head()
| Room_Num | Airport | Waterbody | Bus_Terminal | |
|---|---|---|---|---|
| 0 | 6 | YES | River | YES |
| 1 | 6 | NO | Lake | YES |
| 3 | 6 | YES | Lake | YES |
| 4 | 7 | NO | Lake | YES |
| 5 | 6 | YES | None | YES |
# Find the Values Counts for Bus_Terminal Column
df_cat.Bus_Terminal.value_counts()
YES 376 Name: Bus_Terminal, dtype: int64
# visulize the count
df_cat.Bus_Terminal.value_counts().plot(kind='bar',color='pink')
plt.title('Count of Bus_Terminal')
plt.ylabel('counts')
plt.xlabel('Bus_Terminal')
plt.xticks(rotation=45)
plt.grid()
plt.show()
InterPretation
# Drop the column
df_house_price_data.drop('Bus_Terminal',axis=1,inplace=True)
df_house_price_data.shape
(376, 21)
df_house_price_data.shape[0]
376
df_house_price_data.shape[1]
21
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
df_num.Price.plot(kind='kde',color='red')
plt.title('kde Plot of Price')
plt.grid()
plt.show()
# find the minimum values
df_num.Price.min()
7.0
# find the maximum values
df_num.Price.max()
36.4
# find the Average Values
df_num.Price.mean()
21.846808510638297
Interpretation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Crime_Rate.min()
0.00632
# find the maximum values
df_num.Crime_Rate.max()
8.98296
# find the average values
df_num.Crime_Rate.mean()
1.2156331382978725
df_num.Crime_Rate.plot(kind='kde',color='red')
plt.title('kde Plot of Crime_Rate')
plt.grid()
plt.show()
Interpretation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Resid_Area.min()
30.74
# find the maximum values
df_num.Resid_Area.max()
57.74
# find the average values
df_num.Resid_Area.mean()
40.443617021276594
df_num.Resid_Area.plot(kind='kde',color='red')
plt.title('kde Plot of Resid_Area')
plt.grid()
plt.show()
Interpretation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Air_Quality.min()
0.385
# find the maximum values
df_num.Air_Quality.max()
0.871
# find the average values
df_num.Air_Quality.mean()
0.5334140957446809
df_num.Air_Quality.plot(kind='kde',color='red')
plt.title('kde Plot of Air_Quality')
plt.grid()
plt.show()
Interpretatio
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Age.min()
2
# find the maximum values
df_num.Age.max()
100
# find the average values
df_num.Age.mean()
63.922872340425535
df_num.Age.plot(kind='kde',color='black')
plt.title('kde Plot of Age')
plt.grid()
plt.show()
Interpretation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Dist1.min()
1.34
# find the maximum values
df_num.Dist1.max()
9.44
# find the average values
df_num.Dist1.mean()
4.3179521276595745
df_num.Dist1.plot(kind='kde',color='black')
plt.title('kde Plot of Dist1')
plt.grid()
plt.show()
Interpretation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Dist2.min()
1.04
# find the maximum values
df_num.Dist2.max()
9.11
# find the average values
df_num.Dist2.mean()
3.9738297872340427
df_num.Dist2.plot(kind='kde',color='black')
plt.title('kde Plot of Dist2')
plt.grid()
plt.show()
Interpretation
# find the minimum values
df_num.Dist3.min()
1.55
# find the maximum values
df_num.Dist3.max()
9.48
# find the average values
df_num.Dist2.mean()
3.9738297872340427
df_num.Dist3.plot(kind='kde',color='black')
plt.title('kde Plot of Dist3')
plt.grid()
plt.show()
Interpreatation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Dist4.min()
1.05
# find the maximum values
df_num.Dist4.max()
9.16
# find the avarage values
df_num.Dist4.mean()
3.9608244680851064
df_num.Dist4.plot(kind='kde',color='black')
plt.title('kde Plot of Dist4')
plt.grid()
plt.show()
Interpreatation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Teachers.min()
18.8
# find the maximum values
df_num.Teachers.max()
25.3
# find the average values
df_num.Teachers.mean()
21.4718085106383
df_num.Teachers.plot(kind='kde',color='black')
plt.title('kde Plot of Teachers')
plt.grid()
plt.show()
Interpretation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Poor_Population.min()
1.98
# find the maximum values
df_num.Poor_Population.max()
30.81
# find the average values
df_num.Poor_Population.mean()
11.774973404255318
df_num.Poor_Population.plot(kind='kde',color='black')
plt.title('kde Plot of Poor_Population')
plt.grid()
plt.show()
Interpretation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.N_Hos_Beds.min()
5.268
# find the maximum values
df_num.N_Hos_Beds.max()
10.668
# find the average values
df_num.N_Hos_Beds.mean()
7.876297872340427
df_num.N_Hos_Beds.plot(kind='kde',color='blue')
plt.title('kde Plot of N_Hos_Beds')
plt.grid()
plt.show()
Interpritation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.N_Hot_Rooms.min()
10.0
# find the maximum values
df_num.N_Hot_Rooms.max()
15.0
# find the average values
df_num.N_Hot_Rooms.mean()
12.5
df_num.N_Hot_Rooms.plot(kind='kde',color='blue')
plt.title('kde Plot of N_Hot_Rooms')
plt.grid()
plt.show()
Interpritation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Rainfall.min()
3
# find the maximum values
df_num.Rainfall.max()
60
# find the average values
df_num.Rainfall.mean()
38.598404255319146
df_num.Rainfall.plot(kind='kde',color='blue')
plt.title('kde Plot of Rainfall')
plt.grid()
plt.show()
Interpritation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
# find the minimum values
df_num.Parks.min()
0.033291762
# find the maximum values
df_num.Parks.max()
0.083032787
# find the Average values
df_num.Parks.mean()
0.05268840343882978
df_num.Parks.plot(kind='kde',color='blue')
plt.title('kde Plot of Parks')
plt.grid()
plt.show()
Interpritation
df_num.head()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 24.0 | 0.00632 | 32.31 | 0.538 | 65 | 4.35 | 3.81 | 4.18 | 4.01 | 24.7 | 4.98 | 5.480 | 11.0 | 23 | 0.049347 | 24.0 | 0 |
| 1 | 1 | 21.6 | 0.02731 | 37.07 | 0.469 | 78 | 4.99 | 4.70 | 5.12 | 5.06 | 22.2 | 9.14 | 7.332 | 12.0 | 42 | 0.046146 | 22.0 | 1 |
| 3 | 3 | 33.4 | 0.03237 | 32.18 | 0.458 | 45 | 6.21 | 5.93 | 6.16 | 5.96 | 21.3 | 2.94 | 9.268 | 11.0 | 45 | 0.047151 | 21.0 | 3 |
| 4 | 4 | 36.2 | 0.06905 | 32.18 | 0.458 | 54 | 6.16 | 5.86 | 6.37 | 5.86 | 21.3 | 5.33 | 8.824 | 11.0 | 55 | 0.039474 | 21.0 | 4 |
| 5 | 5 | 28.7 | 0.02985 | 32.18 | 0.458 | 58 | 6.22 | 5.80 | 6.23 | 5.99 | 21.3 | 5.21 | 7.174 | 14.0 | 53 | 0.045910 | 21.0 | 5 |
df_cat.head()
| Room_Num | Airport | Waterbody | Bus_Terminal | |
|---|---|---|---|---|
| 0 | 6 | YES | River | YES |
| 1 | 6 | NO | Lake | YES |
| 3 | 6 | YES | Lake | YES |
| 4 | 7 | NO | Lake | YES |
| 5 | 6 | YES | None | YES |
# step1: do the Bivariant Analysis on Numarical Data
# find the corelation
df_num.corr()
| index | Price | Crime_Rate | Resid_Area | Air_Quality | Age | Dist1 | Dist2 | Dist3 | Dist4 | Teachers | Poor_Population | N_Hos_Beds | N_Hot_Rooms | Rainfall | Parks | Teachers | index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| index | 1.000000 | -0.130497 | 0.521741 | 0.326491 | 0.322834 | 0.081943 | -0.228977 | -0.233908 | -0.236092 | -0.220572 | -0.200740 | 0.074148 | 0.058926 | 0.080481 | 0.028077 | 0.301590 | -0.261536 | 1.000000 |
| Price | -0.130497 | 1.000000 | -0.424735 | -0.539404 | -0.518091 | -0.556358 | 0.368112 | 0.365491 | 0.364496 | 0.366804 | 0.398508 | -0.750503 | 0.037783 | -0.070135 | -0.007160 | -0.491044 | 0.402793 | -0.130497 |
| Crime_Rate | 0.521741 | -0.424735 | 1.000000 | 0.554473 | 0.649419 | 0.418289 | -0.456078 | -0.455079 | -0.458358 | -0.447356 | -0.302125 | 0.393205 | -0.022331 | 0.047310 | 0.026116 | 0.595869 | -0.348126 | 0.521741 |
| Resid_Area | 0.326491 | -0.539404 | 0.554473 | 1.000000 | 0.753000 | 0.598148 | -0.700206 | -0.703032 | -0.703908 | -0.702769 | -0.289787 | 0.562430 | 0.011938 | -0.042359 | 0.014066 | 0.695053 | -0.341594 | 0.326491 |
| Air_Quality | 0.322834 | -0.518091 | 0.649419 | 0.753000 | 1.000000 | 0.688243 | -0.745603 | -0.747011 | -0.746911 | -0.742947 | -0.189400 | 0.539936 | -0.047659 | 0.035111 | 0.027494 | 0.904715 | -0.224335 | 0.322834 |
| Age | 0.081943 | -0.556358 | 0.418289 | 0.598148 | 0.688243 | 1.000000 | -0.690885 | -0.690063 | -0.690537 | -0.692938 | -0.275263 | 0.638895 | -0.000220 | 0.003201 | 0.009894 | 0.643731 | -0.281424 | 0.081943 |
| Dist1 | -0.228977 | 0.368112 | -0.456078 | -0.700206 | -0.745603 | -0.690885 | 1.000000 | 0.997494 | 0.997356 | 0.993068 | 0.272658 | -0.469418 | -0.031694 | 0.005985 | 0.028576 | -0.692236 | 0.290859 | -0.228977 |
| Dist2 | -0.233908 | 0.365491 | -0.455079 | -0.703032 | -0.747011 | -0.690063 | 0.997494 | 1.000000 | 0.997741 | 0.992902 | 0.278089 | -0.466593 | -0.035692 | 0.008342 | 0.023889 | -0.694414 | 0.297077 | -0.233908 |
| Dist3 | -0.236092 | 0.364496 | -0.458358 | -0.703908 | -0.746911 | -0.690537 | 0.997356 | 0.997741 | 1.000000 | 0.993118 | 0.276678 | -0.465286 | -0.029549 | 0.009390 | 0.022670 | -0.697132 | 0.295347 | -0.236092 |
| Dist4 | -0.220572 | 0.366804 | -0.447356 | -0.702769 | -0.742947 | -0.692938 | 0.993068 | 0.992902 | 0.993118 | 1.000000 | 0.270843 | -0.471031 | -0.023569 | 0.016509 | 0.034504 | -0.692037 | 0.288949 | -0.220572 |
| Teachers | -0.200740 | 0.398508 | -0.302125 | -0.289787 | -0.189400 | -0.275263 | 0.272658 | 0.278089 | 0.276678 | 0.270843 | 1.000000 | -0.250510 | -0.026724 | -0.071389 | -0.029864 | -0.180236 | 0.988189 | -0.200740 |
| Poor_Population | 0.074148 | -0.750503 | 0.393205 | 0.562430 | 0.539936 | 0.638895 | -0.469418 | -0.466593 | -0.465286 | -0.471031 | -0.250510 | 1.000000 | -0.051604 | -0.002933 | 0.001505 | 0.528261 | -0.266741 | 0.074148 |
| N_Hos_Beds | 0.058926 | 0.037783 | -0.022331 | 0.011938 | -0.047659 | -0.000220 | -0.031694 | -0.035692 | -0.029549 | -0.023569 | -0.026724 | -0.051604 | 1.000000 | -0.028647 | 0.066010 | -0.077238 | -0.036978 | 0.058926 |
| N_Hot_Rooms | 0.080481 | -0.070135 | 0.047310 | -0.042359 | 0.035111 | 0.003201 | 0.005985 | 0.008342 | 0.009390 | 0.016509 | -0.071389 | -0.002933 | -0.028647 | 1.000000 | 0.078927 | 0.063127 | -0.066646 | 0.080481 |
| Rainfall | 0.028077 | -0.007160 | 0.026116 | 0.014066 | 0.027494 | 0.009894 | 0.028576 | 0.023889 | 0.022670 | 0.034504 | -0.029864 | 0.001505 | 0.066010 | 0.078927 | 1.000000 | 0.032996 | -0.041106 | 0.028077 |
| Parks | 0.301590 | -0.491044 | 0.595869 | 0.695053 | 0.904715 | 0.643731 | -0.692236 | -0.694414 | -0.697132 | -0.692037 | -0.180236 | 0.528261 | -0.077238 | 0.063127 | 0.032996 | 1.000000 | -0.212599 | 0.301590 |
| Teachers | -0.261536 | 0.402793 | -0.348126 | -0.341594 | -0.224335 | -0.281424 | 0.290859 | 0.297077 | 0.295347 | 0.288949 | 0.988189 | -0.266741 | -0.036978 | -0.066646 | -0.041106 | -0.212599 | 1.000000 | -0.261536 |
| index | 1.000000 | -0.130497 | 0.521741 | 0.326491 | 0.322834 | 0.081943 | -0.228977 | -0.233908 | -0.236092 | -0.220572 | -0.200740 | 0.074148 | 0.058926 | 0.080481 | 0.028077 | 0.301590 | -0.261536 | 1.000000 |
# Draw the hit map for coralation
sns.heatmap(df_num.corr(),annot=True)
sns.set(rc={"figure.figsize":(10,10)})
# Dist1 - Parks
# Dist2 - Parks
# Dist3 - Parks
# Dist4 - Parks
# Price - Poor_Population
# Resid_Area - Dist1
# Resid_Area - Dist2
# Resid_Area - Dist3
# Resid_Area - Dist4
# Air_Quality - Dist1
# Air_Quality - Dist2
# Air_Quality - Dist3
# Air_Quality - Dist4
# Age - Dist1
# Age - Dist2
# Age - Dist3
# Age - Dist4
# Resid_Area - Air_Quality
# Age - Air_Quality
# Parks - Resid_Area
# Air_Quality - Parks
sns.scatterplot(x='Dist1',y='Parks',data=df_num)
plt.xlabel('Dist1',fontsize=30)
plt.ylabel('Parks',fontsize=30)
plt.title('Relation between Dist1 and Parks',fontsize=40)
plt.show()
Interpretation
sns.scatterplot(x='Dist2',y='Parks',data=df_num)
plt.xlabel('Dist2',fontsize=30)
plt.ylabel('Parks',fontsize=30)
plt.title('Relation between Dist2 and Parks',fontsize=40)
plt.show()
Interpretation
sns.scatterplot(x='Dist3',y='Parks',data=df_num)
plt.xlabel('Dist3',fontsize=30)
plt.ylabel('Parks',fontsize=30)
plt.title('Relation between Dist3 and Parks',fontsize=40)
plt.show()
Interpretation
-there is slitly negative corelation
sns.scatterplot(x='Dist4',y='Parks',data=df_num)
plt.xlabel('Dist4',fontsize=30)
plt.ylabel('Parks',fontsize=30)
plt.title('Relation between Dist4 and Parks',fontsize=40)
plt.show()
sns.scatterplot(x='Price',y='Poor_Population',data=df_num)
plt.xlabel('Price',fontsize=30)
plt.ylabel('Poor_Population',fontsize=30)
plt.title('Relation between Price and Poor_Population',fontsize=40)
plt.show()
Interpretation
-there is slitly negative corelation
sns.scatterplot(x='Resid_Area',y='Dist1',data=df_num)
plt.xlabel('Resid_Area',fontsize=30)
plt.ylabel('Dist1',fontsize=30)
plt.title('Relation between Resid_Area and Dist1',fontsize=40)
plt.show()
Interpretation
sns.scatterplot(x='Resid_Area',y='Dist2',data=df_num)
plt.xlabel('Resid_Area',fontsize=30)
plt.ylabel('Dist2',fontsize=30)
plt.title('Relation between Resid_Area and Dist2',fontsize=40)
plt.show()
sns.scatterplot(x='Resid_Area',y='Dist3',data=df_num)
plt.xlabel('Resid_Area',fontsize=30)
plt.ylabel('Dist3',fontsize=30)
plt.title('Relation between Resid_Area and Dist3',fontsize=40)
plt.show()
sns.scatterplot(x='Resid_Area',y='Dist4',data=df_num)
plt.xlabel('Resid_Area',fontsize=30)
plt.ylabel('Dist4',fontsize=30)
plt.title('Relation between Resid_Area and Dist4',fontsize=40)
plt.show()
sns.scatterplot(x='Air_Quality',y='Dist1',data=df_num)
plt.xlabel('Air_Quality',fontsize=30)
plt.ylabel('Dist1',fontsize=30)
plt.title('Relation between Air_Quality and Dist1',fontsize=40)
plt.show()
sns.scatterplot(x='Air_Quality',y='Dist2',data=df_num)
plt.xlabel('Air_Quality',fontsize=30)
plt.ylabel('Dist2',fontsize=30)
plt.title('Relation between Air_Quality and Dist2',fontsize=40)
plt.show()
sns.scatterplot(x='Air_Quality',y='Dist3',data=df_num)
plt.xlabel('Air_Quality',fontsize=30)
plt.ylabel('Dist3',fontsize=30)
plt.title('Relation between Air_Quality and Dist3',fontsize=40)
plt.show()
sns.scatterplot(x='Air_Quality',y='Dist4',data=df_num)
plt.xlabel('Air_Quality',fontsize=30)
plt.ylabel('Dist4',fontsize=30)
plt.title('Relation between Air_Quality and Dist4',fontsize=40)
plt.show()
sns.scatterplot(x='Age',y='Dist1',data=df_num)
plt.xlabel('Age',fontsize=30)
plt.ylabel('Dist1',fontsize=30)
plt.title('Relation between Age and Dist1',fontsize=40)
plt.show()
sns.scatterplot(x='Age',y='Dist2',data=df_num)
plt.xlabel('Age',fontsize=30)
plt.ylabel('Dist2',fontsize=30)
plt.title('Relation between Age and Dist2',fontsize=40)
plt.show()
sns.scatterplot(x='Dist3',y='Age',data=df_num)
plt.xlabel('Dist3',fontsize=30)
plt.ylabel('Age',fontsize=30)
plt.title('Relation between Dist3 and Age',fontsize=40)
plt.show()
Interpretation
sns.scatterplot(x='Age',y='Dist4',data=df_num)
plt.xlabel('Age',fontsize=30)
plt.ylabel('Dist4',fontsize=30)
plt.title('Relation between Age and Dist4',fontsize=40)
plt.show()
sns.scatterplot(x='Resid_Area',y='Air_Quality',data=df_num)
plt.xlabel('Resid_Area',fontsize=30)
plt.ylabel('Air_Quality',fontsize=30)
plt.title('Relation between Resid_Area and Air_Quality',fontsize=40)
plt.show()
sns.scatterplot(x='Age',y='Air_Quality',data=df_num)
plt.xlabel('Age',fontsize=30)
plt.ylabel('Air_Quality',fontsize=30)
plt.title('Relation between Age and Air_Quality',fontsize=40)
plt.show()
sns.scatterplot(x='Parks',y='Resid_Area',data=df_num)
plt.xlabel('Parks',fontsize=30)
plt.ylabel('Resid_Area',fontsize=30)
plt.title('Relation between Parks and Resid_Area',fontsize=40)
plt.show()
sns.scatterplot(x='Air_Quality',y='Parks',data=df_num)
plt.xlabel('Air_Quality',fontsize=30)
plt.ylabel('Parks',fontsize=30)
plt.title('Relation between Air_Quality and Parks',fontsize=40)
plt.show()
Interpretation
# Parks
# Air_Quality
# Poor_Population
# Price
df_cat.head()
| Room_Num | Airport | Waterbody | Bus_Terminal | |
|---|---|---|---|---|
| 0 | 6 | YES | River | YES |
| 1 | 6 | NO | Lake | YES |
| 3 | 6 | YES | Lake | YES |
| 4 | 7 | NO | Lake | YES |
| 5 | 6 | YES | None | YES |
x='Room_Num'
y='Parks'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Room_Num',fontsize=20)
plt.ylabel('Parks',fontsize=20)
plt.show()
sns.barplot(x='Room_Num',y='Parks',data=df_house_price_data)
plt.show()
x='Room_Num'
y='Air_Quality'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Room_Num',fontsize=20)
plt.ylabel('Air_Quality',fontsize=20)
plt.show()
x='Room_Num'
y='Poor_Population'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Room_Num',fontsize=20)
plt.ylabel('Poor_Population',fontsize=20)
plt.show()
Interpretatin
x='Room_Num'
y='Price'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Room_Num',fontsize=20)
plt.ylabel('Price',fontsize=20)
plt.show()
Interpretation
x='Airport'
y='Parks'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Room_Num',fontsize=20)
plt.ylabel('Parks',fontsize=20)
plt.show()
Interpretation
x='Airport'
y='Air_Quality'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Airport',fontsize=20)
plt.ylabel('Air_Quality',fontsize=20)
plt.show()
Interpretation
x='Airport'
y='Poor_Population'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Airport',fontsize=20)
plt.ylabel('Poor_Population',fontsize=20)
plt.show()
Interpretation
x='Airport'
y='Price'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Airport',fontsize=20)
plt.ylabel('Price',fontsize=20)
plt.show()
Interpretation
x='Waterbody'
y='Parks'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Waterbody',fontsize=20)
plt.ylabel('Parks',fontsize=20)
plt.show()
Interpretation
x='Waterbody'
y='Parks'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Waterbody',fontsize=20)
plt.ylabel('Parks',fontsize=20)
plt.show()
Interpretation
x='Waterbody'
y='Air_Quality'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Waterbody',fontsize=20)
plt.ylabel('Air_Quality',fontsize=20)
plt.show()
Interpretation
x='Waterbody'
y='Poor_Population'
plt.bar(x,y,data=df_house_price_data)
plt.xlabel('Waterbody',fontsize=20)
plt.ylabel('Poor_Population',fontsize=20)
plt.show()
Interpretation
# Add hue
# cross tab
# Pivot table
# Pairs 1
# col1: Prices, col2:Parks, col3: Room_Num
# col1: Prices, col2:Air_Quality, col3: Room_Num
# col1: Prices, col2:Poor_Population, col3: Room_Num
# col1: Prices, col2:Parks, col3: Airports
# col1: Prices, col2:Air_Quality, col3: Airport
# col1: Prices, col2:Poor_Population, col3: Airport
# col1: Prices, col2:Parks, col3: Waterbody
# col1: Prices, col2:Air_Quality, col3: Waterbody
# col1: Prices, col2:Poor_PoPulation, col3: Waterbody
# Note: my Column 3 should be the hue Parametre
sns.set(rc={"figure.figsize":(10,10)})
df_house_price_data.columns
Index(['index', 'Price', 'Crime_Rate', 'Resid_Area', 'Air_Quality', 'Age',
'Dist1', 'Dist2', 'Dist3', 'Dist4', 'Teachers', 'Poor_Population',
'N_Hos_Beds', 'N_Hot_Rooms', 'Rainfall', 'Parks', 'Teachers ', 'index',
'Room_Num', 'Airport', 'Waterbody'],
dtype='object')
# scater Plot
sns.scatterplot(x="Price",y="Parks",hue="Room_Num",data=df_house_price_data)
plt.show()
INTERPRETATION
When number of Parks high (0.08) that time the Price is low (13 lacs) the average numbers of Room_Num is that senario is 3
Parks when the no of parks is low (0.04) that time the price of the house is high (36 lacs) in that senario we have average room number is 8.
# scater Plot
sns.scatterplot(x="Price",y="Air_Quality",hue="Room_Num",data=df_house_price_data)
plt.show()
**Interpretation
when the price is low(13 lacs) that time the airquality is really good then average number of room in that senario is 4
when the price is high (36 lacs)that time the airquality is really bed 0.45 then average number of room in that senario is 8
# scater Plot
sns.scatterplot(x="Price",y="Poor_Population",hue="Room_Num",data=df_house_price_data)
plt.show()
**Interpretation
when the price is low(8 lacs) that time the Poor_Population is 30 then average number of room in that senario is 4
when the price is high (36 lacs)that time the Poor_Population is 8 then average number of room in that senario is 8
# scater Plot
sns.scatterplot(x="Price",y="Parks",hue="Airport",data=df_house_price_data)
plt.show()
Interpretation
When we have highest Price 37 lacs Then the no of Parks is 0.045 that time ther is Airports is Present.
When we have lowest Price 17 lacs Then the no of Parks is 0.08 that time ther is Airports is Present
# scater Plot
sns.scatterplot(x="Price",y="Air_Quality",hue="Airport",data=df_house_price_data)
plt.show()
Interpretation
When we have highest Price 37 lacs Then the Air_Quality is 0.49 that time ther is Airports is Present.
When we have lowest Price 17 lacs Then the Air_Quality is 0.9 that time ther is Airports is Present
# scater Plot
sns.scatterplot(x="Price",y="Poor_Population",hue="Airport",data=df_house_price_data)
plt.show()
Interpretation
When we have highest Price 37 lacs Then the Poor_Population is 3 that time ther is Airports is Present.
When we have lowest Price 8 lacs Then the Poor_Population is 28 that time ther is Airports is Present
# scater Plot
sns.scatterplot(x="Price",y="Parks",hue="Waterbody",data=df_house_price_data)
plt.show()
interpretation
# scater Plot
sns.scatterplot(x="Price",y="Air_Quality",hue="Waterbody",data=df_house_price_data)
plt.show()
interpretation
# scater Plot
sns.scatterplot(x="Price",y="Poor_Population",hue="Waterbody",data=df_house_price_data)
plt.show()
interpretation
# Room_Num - Parks
# Room_Num - Air_Quality
# Room_Num - Poor_Populatin
# Airport - Parks
# Airport - Air_Quality
# Airport - Poor_Populatin
# Waterbody - Parks
# Waterbody - Air_Quality
# Waterbody - Poor_Populatin
# col1:Room_Num, col2: Airporat, col3: Price
# col1:Room_Num, col2: Waterbody, col3: Price
# col1:Airport, col2: Waterbody, col3: Price
df_house_price_data.columns
Index(['index', 'Price', 'Crime_Rate', 'Resid_Area', 'Air_Quality', 'Age',
'Dist1', 'Dist2', 'Dist3', 'Dist4', 'Teachers', 'Poor_Population',
'N_Hos_Beds', 'N_Hot_Rooms', 'Rainfall', 'Parks', 'Teachers ', 'index',
'Room_Num', 'Airport', 'Waterbody'],
dtype='object')
pd.crosstab(df_house_price_data.Room_Num,df_house_price_data.Airport,values=df_house_price_data.Price,aggfunc='mean')
| Airport | NO | YES |
|---|---|---|
| Room_Num | ||
| 3 | NaN | 27.500000 |
| 4 | 17.533333 | 11.800000 |
| 5 | 17.485000 | 19.328333 |
| 6 | 22.522449 | 22.969231 |
| 7 | 31.855556 | 31.684615 |
| 8 | 21.900000 | NaN |
Interpretation
pd.crosstab(df_house_price_data.Room_Num,df_house_price_data.Waterbody,values=df_house_price_data.Price,aggfunc='mean')
| Waterbody | Lake | Lake and River | None | River |
|---|---|---|---|---|
| Room_Num | ||||
| 3 | NaN | NaN | NaN | 27.500000 |
| 4 | 21.900000 | NaN | 14.600000 | 13.950000 |
| 5 | 18.383333 | 18.200000 | 18.150000 | 18.757500 |
| 6 | 22.871795 | 22.057143 | 22.574286 | 23.202381 |
| 7 | 32.116667 | 32.600000 | 31.033333 | 31.600000 |
| 8 | NaN | NaN | NaN | 21.900000 |
Interpretation
pd.crosstab(df_house_price_data.Airport,df_house_price_data.Waterbody,values=df_house_price_data.Price,aggfunc='mean')
| Waterbody | Lake | Lake and River | None | River |
|---|---|---|---|---|
| Airport | ||||
| NO | 21.385714 | 20.003226 | 20.655319 | 21.966154 |
| YES | 22.595238 | 22.695833 | 21.643077 | 22.924324 |
Interpretation
pd.pivot_table(data=df_house_price_data,
values='Price',
index='Airport',
columns='Waterbody',
aggfunc='mean')
| Waterbody | Lake | Lake and River | None | River |
|---|---|---|---|---|
| Airport | ||||
| NO | 21.385714 | 20.003226 | 20.655319 | 21.966154 |
| YES | 22.595238 | 22.695833 | 21.643077 | 22.924324 |
Interpretation
pd.pivot_table(data=df_house_price_data,
values='Price',
index='Room_Num',
columns='Waterbody',
aggfunc='mean')
| Waterbody | Lake | Lake and River | None | River |
|---|---|---|---|---|
| Room_Num | ||||
| 3 | NaN | NaN | NaN | 27.500000 |
| 4 | 21.900000 | NaN | 14.600000 | 13.950000 |
| 5 | 18.383333 | 18.200000 | 18.150000 | 18.757500 |
| 6 | 22.871795 | 22.057143 | 22.574286 | 23.202381 |
| 7 | 32.116667 | 32.600000 | 31.033333 | 31.600000 |
| 8 | NaN | NaN | NaN | 21.900000 |
Interpretation
pd.pivot_table(data=df_house_price_data,
values='Price',
index='Room_Num',
columns='Airport',
aggfunc='mean')
| Airport | NO | YES |
|---|---|---|
| Room_Num | ||
| 3 | NaN | 27.500000 |
| 4 | 17.533333 | 11.800000 |
| 5 | 17.485000 | 19.328333 |
| 6 | 22.522449 | 22.969231 |
| 7 | 31.855556 | 31.684615 |
| 8 | 21.900000 | NaN |
Interpretation